Sun Solutions 1997 April to September

home *** CD-ROM | disk | FTP | other *** search

/ Sun Solutions 1997 April to September / Sun Solutions CD - APR '97 - SEP '97 (704-3778-12 Rev. H)(Sun Microsystems, Inc.)(1997).iso / products / .wais / wais_SunSolutions / UPDATE_INDEX < prev next >

Wrap

Text File | 1997-02-28 | 3KB | 81 lines

#!/bin/sh # # little script to index the file tree. We are using find instead of the # recursive option to waisindex because the waisindex option doesn't work # well. # # HEY! - this script runs IN the wais data directory ######################################################################## # variables: # # INDEX_NAME - the name of the index database. it should match the name # in the /cgi-bin/newwais.pl file ($src). # HTTP_SERVER - server hostname # DOCROOT - root directory that httpd is serving out of. # DIRECTORIES - list of all the directories to index # # # NOTE: just changing the variable is not enough. you must change all the # pathnames in the file to your installation specifics. INDEX_NAME=/tmp/httpd/.wais/wais_SunSolutions/wais_SunSolutions HTTP_SERVER=localhost:7999 DOCROOT="/tmp/httpd/.products" #INDEX_NAME="/opt/db/wais/catalyst_catalog" #HTTP_SERVER="pinatubo" #DOCROOT="$CD_MOUNT/var/opt/WWW/NCSA/htdocs/CCx86-sparc" #DOCROOT="/opt/db/wais-src" ######################################################################## # get rid of the temporary index file. if a synonym file does not exist # create a dummy one. # # Shouldn't need to worry about this section # rm -f $INDEX_NAME.*idxable if [ ! -f $INDEX_NAME.syn ] then echo "# synonym file. form is:" > $INDEX_NAME.syn echo "# word syn0 syn1 ..." >> $INDEX_NAME.syn echo "# e.g." >> $INDEX_NAME.syn echo "# spam pork-shoulder yummy" >> $INDEX_NAME.syn echo "dummy dummy" >> $INDEX_NAME.syn fi ######################################################################## # use find to add the filenames to a temp file. if you add more file # types (e.g. .gif is a file type) # you'll probably want to update /cgi-bin/newwais.pl in your httpd # httpd directory so the search result is pretty find $DOCROOT -follow -name "*.html" -print | egrep -v .wais | egrep -v .bin | egrep -v .categories | egrep -v SunSolutions >> $INDEX_NAME.idxable ; find $DOCROOT -follow -name "*.ps" -print | egrep -v .wais | egrep -v .bin | egrep -v .categories | egrep -v SunSolutions >> $INDEX_NAME.idxable ; find $DOCROOT -follow -name "*.eps" -print | egrep -v .wais | egrep -v .bin | egrep -v .categories | egrep -v SunSolutions >> $INDEX_NAME.idxable ; find $DOCROOT -follow -name "*.txt" -print | egrep -v .wais | egrep -v .bin | egrep -v .categories | egrep -v SunSolutions >> $INDEX_NAME.idxable ; find $DOCROOT -follow -name "*.htm" -print | egrep -v .wais | egrep -v .bin | egrep -v .categories | egrep -v SunSolutions >> $INDEX_NAME.idxable ; ######################################################################## # index the files using the temp file as input. The URL substitution # is a feature of freeWAIS .202 and up. it transforms the filename # into the correct URL so that relative URL's work. The general # form is -t URL <what to strip off the front> <what to add to the front> # # notes: # # * use -a on the subsequent index runs to keep appending to the index file # * -nocontents tells the indexer to only use the filename...the file # contents is ignored ./waisindex -d $INDEX_NAME -export -t URL /tmp/httpd/.products http://$HTTP_SERVER -stdin < $INDEX_NAME.idxable #./waisindex -a -nocontents -d $INDEX_NAME -export -t URL $CD_MOUNT/var/opt/WWW/NCSA/htdocs http://$HTTP_SERVER -stdin < $INDEX_NAME.notidxable